Hot and cold days#
https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/GHCND_documentation.pdf
import warnings
warnings.filterwarnings("ignore")
import os
import sys
import numpy as np
import pandas as pd
from datetime import datetime
sys.path.append("../../../../indicators_setup")
from ind_setup.plotting_int import plot_timeseries_interactive
from ind_setup.colors import get_df_col
sys.path.append("../../../functions")
from data_downloaders import GHCN
from temp_func import exceedance_rate_for_base_period, exceedance_rate_for_outbase_period
Define location and variables of interest#
country = 'Palau'
vars_interest = ['TMIN', 'TMAX']
Get Data#
df_country = GHCN.get_country_code(country)
print(f'The GHCN code for {country} is {df_country["Code"].values[0]}')
The GHCN code for Palau is PS
df_stations = GHCN.download_stations_info()
df_country_stations = df_stations[df_stations['ID'].str.startswith(df_country.Code.values[0])]
print(f'There are {df_country_stations.shape[0]} stations in {country}')
There are 13 stations in Palau
GHCND_dir = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/'
Using Koror Station#
id = 'PSW00040309' # Koror Station
dict_min = GHCN.extract_dict_data_var(GHCND_dir, 'TMIN', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
dict_max = GHCN.extract_dict_data_var(GHCND_dir, 'TMAX', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
from datetime import datetime
import pandas as pd
import numpy as np
from datetime import datetime
import pandas as pd
import numpy as np
def exceedance_rate_for_base_period(climate_data, variable_name):
exceedance_rates = {}
all_exceedance_data = {}
base_period_years = range(1961, 1991)
base_period_data = climate_data[climate_data['DATE'].dt.year.isin(base_period_years)]
# Precompute thresholds for all days in the base period
thresholds = {}
for day in base_period_data['DAY'].unique():
thresholds[day] = centered_percentile(day, base_period_data, variable_name)
for out_of_base_year in base_period_years:
out_of_base_data = climate_data[climate_data['DATE'].dt.year == out_of_base_year].copy()
out_of_base_data['THRESHOLD'] = out_of_base_data['DAY'].map(thresholds)
if variable_name == "TMAX":
exceedance_rate = (out_of_base_data[variable_name] > out_of_base_data['THRESHOLD']).mean()
elif variable_name == "TMIN":
exceedance_rate = (out_of_base_data[variable_name] < out_of_base_data['THRESHOLD']).mean()
exceedance_rates[out_of_base_year] = exceedance_rate
all_exceedance_data[out_of_base_year] = {out_of_base_year: exceedance_rate}
return exceedance_rates, all_exceedance_data
def centered_percentile(date, base_df, variable_name):
filtered_df = base_df[(base_df["DATE"] >= datetime(1960, 12, 29)) & (base_df["DATE"] <= datetime(1991, 1, 2))]
window_values = []
for x in filtered_df[filtered_df['DAY'] == date]['DATE']:
window_values.extend(filtered_df[(filtered_df['DATE'] >= x - pd.Timedelta(days=2)) &
(filtered_df['DATE'] <= x + pd.Timedelta(days=2))][variable_name].tolist())
if variable_name == "TMAX":
return np.percentile(window_values, 90)
elif variable_name == "TMIN":
return np.percentile(window_values, 10)
def exceedance_rate_for_outbase_period(climate_data, variable_name):
date_range = pd.date_range('2024-01-01', '2024-12-31', freq='D')
df_exceedance = pd.DataFrame({'DAY': date_range})
df_exceedance['THRESHOLD'] = df_exceedance['DAY'].apply(lambda day_value: centered_percentile(day_value, climate_data, variable_name))
return df_exceedance
def centered_percentile(date, base_df, variable_name):
filtered_df = base_df[(base_df["DATE"] >= datetime(1960, 12, 29)) & (base_df["DATE"] <= datetime(1991, 1, 2))]
window_values = []
for x in filtered_df[filtered_df['DAY'] == date]['DATE']:
window_values.extend(filtered_df[(filtered_df['DATE'] >= x - pd.Timedelta(days=2)) &
(filtered_df['DATE'] <= x + pd.Timedelta(days=2))][variable_name].tolist())
if variable_name == "TMAX":
return np.percentile(window_values, 90)
elif variable_name == "TMIN":
return np.percentile(window_values, 10)
st_data = pd.concat([dict_min['data'], (dict_max['data'])], axis=1).dropna()
st_data['DATE'] = st_data.index
st_data['DAY'] = "2024-" + st_data['DATE'].dt.strftime('%m-%d')
st_data['DAY'] = pd.to_datetime(st_data['DAY'], format='%Y-%m-%d')
st_data.index = range(len(st_data))
st_data
| TMIN | TMAX | DATE | DAY | |
|---|---|---|---|---|
| 0 | 23.9 | 32.2 | 1951-07-01 | 2024-07-01 |
| 1 | 24.4 | 31.7 | 1951-07-02 | 2024-07-02 |
| 2 | 23.9 | 30.6 | 1951-07-03 | 2024-07-03 |
| 3 | 23.3 | 28.9 | 1951-07-04 | 2024-07-04 |
| 4 | 23.9 | 31.1 | 1951-07-05 | 2024-07-05 |
| ... | ... | ... | ... | ... |
| 26126 | 25.6 | 30.6 | 2024-12-10 | 2024-12-10 |
| 26127 | 26.1 | 30.0 | 2024-12-11 | 2024-12-11 |
| 26128 | 25.0 | 30.6 | 2024-12-12 | 2024-12-12 |
| 26129 | 25.6 | 30.6 | 2024-12-13 | 2024-12-13 |
| 26130 | 25.0 | 30.0 | 2024-12-16 | 2024-12-16 |
26131 rows × 4 columns
temp_table = {}
exceed_rates_TMAX = exceedance_rate_for_outbase_period(st_data, "TMAX")
exceed_rates_TMIN = exceedance_rate_for_outbase_period(st_data, "TMIN")
TMAX_dict = dict(zip(exceed_rates_TMAX['DAY'], exceed_rates_TMAX['THRESHOLD']))
TMIN_dict = dict(zip(exceed_rates_TMIN['DAY'], exceed_rates_TMIN['THRESHOLD']))
df_exceed = st_data.copy()
df_exceed['THRESHOLD_TMAX'] = df_exceed['DAY'].apply(lambda day_value:TMAX_dict.get(day_value))
df_exceed['HOT_DAY'] = df_exceed[['TMAX',"THRESHOLD_TMAX"]].apply(lambda x: x["TMAX"] > x["THRESHOLD_TMAX"],axis=1)
df_exceed['THRESHOLD_TMIN'] = df_exceed['DAY'].apply(lambda day_value:TMIN_dict.get(day_value))
df_exceed['COLD_DAY'] = df_exceed[['TMIN',"THRESHOLD_TMIN"]].apply(lambda x: x["TMIN"] < x["THRESHOLD_TMIN"],axis=1)
df_exceed['HOT_DAY'].mean()
np.float64(0.10665493092495504)
df_exceed[df_exceed["DATE"] > datetime(1990, 12, 31)]['HOT_DAY'].mean()
np.float64(0.18227653392582466)
df_exceed['YEAR'] = pd.DatetimeIndex(st_data['DATE']).year
out_of_base_hot = {}
out_of_base_cold = {}
for x in df_exceed["YEAR"].unique():
if x > 1990:
out_of_base_hot[x] = df_exceed[df_exceed["YEAR"] == x]['HOT_DAY'].mean()
out_of_base_cold[x] = df_exceed[df_exceed["YEAR"] == x]['COLD_DAY'].mean()
ex_cold, all_cold = exceedance_rate_for_base_period(st_data, "TMIN")
ex_hot, all_hot = exceedance_rate_for_base_period(st_data, "TMAX")
all_hot = ex_hot|out_of_base_hot
all_cold = ex_cold|out_of_base_cold
cold_bar = sum(ex_cold.values()) / len(ex_cold)
hot_bar = sum(ex_hot.values()) / len(ex_hot)
hot_anom = {}
for x in all_hot:
hot_anom[x] = 100*(all_hot[x]-hot_bar)
cold_anom = {}
for x in all_cold:
cold_anom[x] = 100*(all_cold[x]-cold_bar)
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
df_cold_anom = pd.DataFrame.from_dict(cold_anom, orient='index', columns=['Perc_Anom'])
df_cold_anom.index = pd.to_datetime(df_cold_anom.index, format='%Y')
df_hot_anom = pd.DataFrame.from_dict(hot_anom, orient='index', columns=['Perc_Anom'])
df_hot_anom.index = pd.to_datetime(df_hot_anom.index, format='%Y')
dict_plot = [{'data' : df_cold_anom, 'var' : 'Perc_Anom', 'ax' : 1, 'label' : 'Cold Days'},
{'data' : df_hot_anom, 'var' : 'Perc_Anom', 'ax' : 1, 'label' : 'Hot Days'}]
fig = plot_timeseries_interactive(dict_plot, trendline=True)